# -*- coding: UTF-8 -*-
import requests
import sys
import pymongo

reload(sys)
sys.setdefaultencoding('utf-8')

from bs4 import BeautifulSoup

connection = pymongo.MongoClient('139.224.135.121', 27017)
db = connection.lianjia
db.authenticate("developer","qwerqwer")
ershoufang = db.ershoufang

i=1
j=1
while i:
	r = requests.get('http://sh.lianjia.com/ershoufang/d'+str(i))
	soup = BeautifulSoup(r.text,'lxml')
	if soup.find('a', attrs={'class': 'text link-hover-green js_triggerGray js_fanglist_title'})==None:
		break
	ul = soup.find('ul', attrs={'class': 'js_fang_list'})
	lilists = ul.find_all('li')
	for li in lilists:
		# 初始化一个字典来存储文章信息
		msg = {}
		# 这里使用一个try except 防止爬虫找不到信息从而停止运行
		try:
			# 开始筛选信息，并保存到字典中
			title=li.find('a', attrs={'class': 'text link-hover-green js_triggerGray js_fanglist_title'})
			img=li.find('a', attrs={'class': 'img js_triggerGray'}).img
			guige=li.find('span', attrs={'class': 'info-col row1-text'})
			guige=guige.text.split()
			tmp=li.find('span', attrs={'class': 'info-col row2-text'})
			tmp=tmp.text.split()
			price=li.find('span', attrs={'class': 'info-col price-item minor'})
			total=li.find('span', attrs={'class': 'total-price strong-num'})
			msg['title'] = title.text.strip()
			msg['imgurl'] = img.attrs['data-img-real']
			msg['shiting']=guige[0]
			msg['mianji']=float(guige[2].strip('平'))
			msg['cengshu']=guige[4]
			if len(guige)>6:
				msg['chaoxiang']=guige[6]
			msg['xiaoqu'] = tmp[0]
			msg['qu'] = tmp[2]
			msg['lu'] = tmp[4]
			if len(tmp)>6:
				msg['year'] = int(tmp[6].strip('年建'))
			msg['price'] = int(price.text.strip().strip('单价').strip('元/平'))
			msg['total'] = int(total.text.strip())
			ershoufang.insert(msg)
			print "第"+str(j)+"条数据已插入数据库"
			j = j+1
		except:
			print('出了点小问题')
	i=i+1
